# Direct outputs not eligible for public release

# E.g., to run
# nohup R CMD BATCH --no-save --no-restore '--args winsorize_quantiles=c(0.001, 0.005, 0.01, 0.02)' & #nolint

args <- (commandArgs(TRUE))
for (i in 1:length(args)) {
    eval(parse(text = args[[i]]))
}

# GLOBAL SETTINGS --------------------------------------------------------------

options(
    scipen = 999,
    digits = 16,
    max.print = .Machine$integer.max,
    show.error.locations = TRUE,
    warn = 1
)

RNGkind("L'Ecuyer-CMRG")
seed <- 818675309L
set.seed(seed) # setting main seed

# PACKAGES ---------------------------------------------------------------------
library(data.table)
library(Matrix)
library(zoo)
library(multiwayvcov)
library(lmtest)
library(checkmate)
library(futile.logger)
library(lfe)

library(remotes)
remotes::install_github("setzler/eventStudy/eventStudy")
library(eventStudy) # for DiD-IV

# PACKAGE SETTINGS -------------------------------------------------------------

# data.table
setDTthreads(threads = 1L)
options(datatable.print.class = TRUE, datatable.print.keys = TRUE)
# so that printing the data.table also shows the variable type on top

# BEGIN FILE -------------------------------------------------------------------

source("~/code/0-utility-functions/wald_es.R", local = TRUE)
outcome <- "db_w2_wages"

MakeEventStudy <- function(winsorize_quantile) {

    # Read in lottery panel data
    lottery_panel <- readRDS("~/population-panel-data/lottery_panel_data.rds")

    # For all outcomes, replace with zero if missing
    if (
        (class(lottery_panel[[outcome]]) == "integer") ||
            (class(lottery_panel[[outcome]]) == "numeric")
    ) {
        lottery_panel[is.na(get(outcome)), (outcome) := 0]
    }

    # Each treated cohort will be restricted to age 21-64 in year 0 and
    # only control units age 21-64 in the same calendar year will be used
    lottery_panel[
        ,
        age_case := as.logical(between(age, 21, 64, incbounds = TRUE))
    ]

    # Now, winsorize the top of the outcome (since non-negative)
    # within each cohort X calendar year
    lottery_panel[, cell := .GRP, by = .(win_yr, tax_yr)]
    cells <- sort(unique(lottery_panel$cell))
    for (cc in cells) {
        temp <- lottery_panel[cell == cc]
        top_q <-
            quantile(
                temp[[outcome]],
                c(1 - winsorize_quantile)
            )[[1]]

        lottery_panel[cell == cc & get(outcome) > top_q, (outcome) := top_q]

        # Housekeeping
        temp <- NULL
        top_q <- NULL
        rm(temp, top_q)
    }
    cc <- NULL
    rm(cc)

    # Run stacked event-study regression
    did_results <-
        Wald_ES2(
            long_data = copy(lottery_panel),
            outcomevar = outcome,
            unit_var = "tin",
            cal_time_var = "tax_yr",
            onset_time_var = "win_yr",
            cluster_vars = "tin",
            omitted_event_time = -2,
            discrete_covars = "age",
            control_subset_var = "age_case",
            control_subset_event_time = 0,
            treated_subset_var = "age_case",
            treated_subset_event_time = 0,
            heterogeneous_only = TRUE,
            anticipation = 0
        )

    winsorize_quantile_text <-
        gsub("\\.", "", as.character(winsorize_quantile * 100))

    saveRDS(
        did_results,
        sprintf(
            "~/estimation-output/event_study_estimates_%s_winsorize_top%s.rds",
            outcome,
            winsorize_quantile_text
        )
    )

    return(winsorize_quantile)
}

num_cores <- length(winsorize_quantiles)
mcmapply(
    MakeEventStudy,
    winsorize_quantiles,
    SIMPLIFY = FALSE,
    mc.silent = FALSE,
    mc.cores = num_cores,
    mc.set.seed = TRUE
)